Purpose
This script is intended to be run as an initial first pass on unfiltered datasets. Users may want to inspect sample distributions and/or run the script interactively to get a feel for the data.
Setup
Parameters
| Parameter | Value |
|---|---|
| parallel | multicore |
| seurat_raw | ./../processedData/Su_7142_210727B6_raw.rds |
| organism | mouse |
| sample_id | SampleID |
| decontamX_detect | TRUE |
| decontamX_filter | TRUE |
| decontamX_thresh | 0.6 |
| doublet_detect | TRUE |
| doublet_filter | TRUE |
| filter | hard |
| nFeature.ll | 500 |
| nFeature.ul | 5000 |
| percent.mt.ul | 10 |
| nCount.ll | 2000 |
| nCount.ul | 40000 |
| transform | standard |
| var_regress | none |
| cell_cycle | TRUE |
| cluster_algorithm | Louvain2 |
| cluster_resolution | c(0.15, 0.2, 0.5, 0.8, 1.1, 1.4) |
| anno_db | c(“ImmGen”, “hpca”) |
| pref_ann | ImmGen |
| biomart_rds | ./../processedData/h2m_biomart.rds |
| ann_collapse_n | 200 |
| downsample | FALSE |
| downsample_cells | 20000 |
| exp_metadata | ./../processedData/scRNAseq_metadata_clean.csv |
| seurat_processed | ./results/QB_SingleCell_qQC/seurat_05Apr2022_0418UTC_10k.rds |
| num_var_feature | 10000 |
# Determining if doublet detecting and filtering will occur
if (isTRUE(params$doublet_filter)){
dbl <- params$doublet_detect
dbl_dtct=TRUE
} else {
if (isFALSE(params$doublet_detect)) {
dbl_dtct=FALSE
} else {dbl_dtct=TRUE}
}
if (isTRUE(params$decontamX_filter)){
dcx_dtct=TRUE
} else {
if (isFALSE(params$decontamX_detect)) {
dcx_dtct=FALSE
} else {dcx_dtct=TRUE}
}
if (params$var_regress=="none"){
var_regress=NULL
} else {
var_regress <- params$var_regress
}
if ("cc" %in% params$var_regress) {
cc_regress=TRUE
var_regress=NULL
} else {
cc_regress=FALSE
}
# This feature not yet supported.
if (!isFALSE(params$exp_metadata)) {
cov_analysis <- TRUE
} else {
cov_analysis <- FALSE
}
if (params$parallel=="multicore") {
bpparam <- MulticoreParam()
} else {
bpparam <- SerialParam()
}
Output Files
- The final seurat file will be saved to: ./results/QB_SingleCell_qQC/seurat_05Apr2022_0418UTC_10k.rds
Load data
seurat_raw <- readRDS(params$seurat_raw)
Plot setup
theme_sara <- function(){
theme_bw(base_size=10)+
theme(axis.text=element_text(color="black"),
panel.background=element_rect(color="white"),
strip.text = element_text(size=12),
strip.background = element_rect(fill="white"))
}
theme_sara_90 <- function() {
theme_bw(base_size=18)+
theme(axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5),
axis.text=element_text(color="black"),
panel.background=element_rect(color="black"),
strip.text = element_text(size=12),
strip.background = element_rect(fill="white"))
}
sid <- params$sample_id
seurat_raw@meta.data$SampleID <- seurat_raw@meta.data[[sid]]
s.cols <- c(rcartocolor::carto_pal(n=4, "Teal"),
rcartocolor::carto_pal(n=4, "Emrld"),
rcartocolor::carto_pal(n=4, "BurgYl"),
rcartocolor::carto_pal(n=4, "PinkYl"),
rcartocolor::carto_pal(n=4, "Purp"))
num_samples <- length(levels(as.factor(seurat_raw@meta.data$SampleID)))
if (num_samples <= 20) {
sample.cols <- sample(s.cols, size=num_samples, replace=FALSE)
} else {
sample.cols <- colorRampPalette(s.cols)(num_samples)
}
names(sample.cols) <- levels(as.factor(seurat_raw@meta.data$SampleID))
heatmap.colfun <- microViz::heat_palette(
palette = "Lisbon",
breaks = 100,
range = c(0,1),
sym = FALSE,
rev = FALSE)
heatmap.colfun <- microViz::heat_palette(
palette = "Lisbon",
breaks = 100,
range = c(0,1),
sym = FALSE,
rev = FALSE)
dbl.cols <- c(`eSinglet`="snow2", `eDoublet`="#EE1289")
dcx.cols <- c(`Native`="azure2", `Ambient`="chartreuse1")
unikn_blue_red.cols <- c(rev(pal_karpfenblau[[5]]), "white", pal_bordeaux[[5]])
ref.cols <- list()
ref.cols[["SingleR_ImmGen"]] <- c(`B cells` = "#F0A0FF",
`Basophils` = "#9370DB",
`Eosinophils`= "#B452CD",
`DC` = "#9DCC00",
`Epithelial cells` = "#87CEFA",
`Endothelial cells` = "#6495ED",
`Fibroblasts` = "navy",
`ILC` = "#191919",
`Macrophages` = "#1C8356",
`Mast cells` = "#E9Debb",
`Monocytes` = "#81C57A",
`Microglia` = "#EEB4B4",
`Neutrophils` = "#b10da1",
`NK cells` = "#8F7C00",
`NKT` = "#FEAF16",
`Others` = "gray40",
`Stem cells` = "red",
`Stromal cells` = "#F08080",
`T cells` = "#FE902EFF",
`Tgd` = "#FFEE33")
ref.cols[["SingleR_HPCA"]] <- c(`Astocyte`="grey",
`B_cell`="#F0A0FF",
`BM`="chocolate4",
`BM & Prog.`="bisque3",
`CMP`="#DC8F71",
`DC`="#9DCC00",
`Endothelial_cells`="#6495ED",
`Epithelial_cells`="#87CEFA",
`Erythroblast`="red4",
`Fibroblasts`="navy",
`Gametocytes`="mediumpurple1",
`HSC`="red",
`Macrophage`="#1C8356",
`Monocyte`="#81C57A",
`Myelocyte`="green",
`Neutrophils`="#b10da1",
`NK_cell`="#8F7C00",
`Osteoblasts`="cyan3",
`Platelets`="#A5DAC8",
`T_cells`="#FE902EFF",
`Others` = "gray40")
ref.cols[["SingleR_hpca"]] <- ref.cols[["SingleR_HPCA"]]
QC
Calculate Percent Mitochondial genes to be used in downstream filtering
# the prefixes below should work for human or mouse.
seurat_raw <- PercentageFeatureSet(seurat_raw, pattern = "^MT-|^mt-", col.name = "percent.mt")
Create 20k cell subsetted seurat for multimode test. The raw data will be downsampled after filtering.
seurat_raw_20k <- subset(seurat_raw, downsample=20000)
Distributions
Look for samples with skewed, abnormal or bimodal distributions
p1 <- RidgePlot(seurat_raw, group.by="SampleID",
features="nFeature_RNA",
same.y.lims=TRUE, sort=TRUE, log=TRUE)
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1 <- p1 + scale_x_continuous(labels = scales::scientific)
p1 <- p1 + theme(axis.text=element_text(size=11))
p2 <- RidgePlot(seurat_raw, group.by="SampleID",
features="nCount_RNA",
same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=sample.cols)
p2 <- p2 + scale_x_continuous(labels = scales::scientific, n.breaks=4)
p2 <- p2 + theme(axis.text=element_text(size=11))
mt.ul <- max(seurat_raw@meta.data$percent.mt)+5
p3 <- RidgePlot(seurat_raw, group.by="SampleID",
features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=sample.cols)
p3 <- p3 + xlim(0, mt.ul)
p3 <- p3 + theme(axis.text=element_text(size=11))
cowplot::plot_grid(p1 + theme(legend.position="none", axis.title.y=element_blank()),
p2 + theme(legend.position="none", axis.title.y=element_blank()),
p3 + theme(legend.position="none", axis.title.y=element_blank()),
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
Modality test
features <- c("nFeature_RNA", "nCount_RNA", "percent.mt")
names(features) <- features
modetest.list <- BiocParallel::bplapply(features,
function(f) multimode::modetest(seurat_raw_20k@meta.data[[f]]),
BPPARAM=MulticoreParam())
pvalues_logical.list <- lapply(modetest.list, function(mt) mt[["p.value"]] <= 0.05)
lapply(modetest.list, function(t) t)
## $nFeature_RNA
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.034961, p-value < 2.2e-16
## alternative hypothesis: true number of modes is greater than 1
##
##
## $nCount_RNA
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.0086293, p-value < 2.2e-16
## alternative hypothesis: true number of modes is greater than 1
##
##
## $percent.mt
##
## Ameijeiras-Alonso et al. (2019) excess mass test
##
## data: seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.002569, p-value = 0.382
## alternative hypothesis: true number of modes is greater than 1
features.test <- features[unlist(pvalues_logical.list)]
modes.list <- BiocParallel::bplapply(features.test,
function(f) multimode::locmodes(seurat_raw@meta.data[[f]],
mod0=2,
display=FALSE),
BPPARAM=bpparam)
antimode.list <- lapply(modes.list,
function(m) round(m$locations[[2]], digits=2))
Inspect the distributions - if the antimode falls in the valley, it is potentially a good cutoff.
p1 <- ggplot(data=seurat_raw@meta.data, aes(x=nFeature_RNA))
p1 <- p1 + geom_density(fill="slateblue4") + labs(x="Number of genes")
if (isTRUE(pvalues_logical.list[["nFeature_RNA"]])) {
p1 <- p1 + geom_vline(xintercept=antimode.list[["nFeature_RNA"]])
p1 <- p1 + labs(caption=paste("antimode =", antimode.list[["nFeature_RNA"]]))
} else {
p1 <- p1 + labs(caption="No antimode to display")
}
p1 <- p1 + theme(plot.caption=element_text(face="italic"))
p2 <- ggplot(data=seurat_raw@meta.data, aes(x=nCount_RNA))
p2 <- p2 + geom_density(fill="magenta4") + labs(x="Number of UMIs") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["nCount_RNA"]])) {
p2 <- p2 + geom_vline(xintercept=antimode.list[["nCount_RNA"]])
p2 <- p2 + labs(caption=paste("antimode =", antimode.list[["nCount_RNA"]]))
} else {
p2 <- p2 + labs(caption="No antimode to display")
}
p2 <- p2 + theme(plot.caption=element_text(face="italic"))
mt.ul <- max(seurat_raw@meta.data$percent.mt)+2
p3 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt))
p3 <- p3 + geom_density(fill="olivedrab3") + labs(x="% mitochondrial genes") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["percent.mt"]])) {
p3 <- p3 + geom_vline(xintercept=antimode.list[["percent.mt"]])
p3 <- p3 + labs(caption=paste("antimode =", antimode.list[["percent.mt"]]))
} else {
p3 <- p3 + labs(caption="No antimode to display")
}
p3 <- p3 + theme(plot.caption=element_text(face="italic"))
p3 <- p3 +xlim(0, mt.ul)
cowplot::plot_grid(p1,
p2,
p3,
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
Stats
Calculate median, median absolute deviation (mad), and outliers. Look for samples that are outliers.
median.list <- lapply(features,
function(f) median(seurat_raw@meta.data[[f]],
na.rm=TRUE))
mad.list <- lapply(features,
function(f) mad(seurat_raw@meta.data[[f]],
na.rm=TRUE))
outliers.list <- lapply(features,
function(f) scater::isOutlier(seurat_raw@meta.data[[f]],
type="both"))
outliers.list <- lapply(outliers.list, function(o) attr(o, "thresholds"))
sample.median.df <- seurat_raw@meta.data %>%
group_by(SampleID) %>%
summarize(nFeature_RNA_median=median(nFeature_RNA, na.rm=TRUE),
nCount_RNA_median=median(nCount_RNA, na.rm=TRUE),
percent.mt_median=median(percent.mt, na.rm=TRUE))
sample.median.df$nFeature_RNA_outlr <- ifelse(sample.median.df$nFeature_RNA_median <= median.list[["nFeature_RNA"]] - 3*mad.list[["nFeature_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nFeature_RNA"]] + 3*mad.list[["nFeature_RNA"]],
"FLAG",
"OK")
sample.median.df$nCount_RNA_outlr <- ifelse(sample.median.df$nCount_RNA_median <= median.list[["nCount_RNA"]] - 3*mad.list[["nCount_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nCount_RNA"]] + 3*mad.list[["nCount_RNA"]],
"FLAG",
"OK")
sample.median.df$percent.mt_outlr <- ifelse(sample.median.df$percent.mt_median >= median.list[["percent.mt"]] + 3*mad.list[["percent.mt"]],
"FLAG",
"OK")
kable(sample.median.df, booktabs = TRUE) %>%
kable_styling(font_size = 12)
| SampleID | nFeature_RNA_median | nCount_RNA_median | percent.mt_median | nFeature_RNA_outlr | nCount_RNA_outlr | percent.mt_outlr |
|---|---|---|---|---|---|---|
| 770128 | 3177.0 | 11656.0 | 3.204618 | OK | OK | OK |
| 770163 | 2935.0 | 11111.5 | 2.043499 | OK | OK | OK |
| 770174 | 3149.0 | 13141.0 | 2.396746 | OK | OK | OK |
| 770184 | 1967.0 | 5575.0 | 4.338221 | OK | OK | OK |
| 770200 | 2870.0 | 9982.0 | 4.486458 | OK | OK | OK |
| 770203 | 1510.5 | 4255.5 | 2.682422 | OK | OK | OK |
| 770216 | 2255.0 | 6919.0 | 3.052563 | OK | OK | OK |
| 770223 | 2313.0 | 7512.0 | 4.445599 | OK | OK | OK |
| 770224 | 3054.0 | 10652.0 | 4.864111 | OK | OK | OK |
| 770225 | 1493.0 | 3557.0 | 3.825524 | OK | OK | OK |
| 770226 | 2578.5 | 8665.0 | 3.463999 | OK | OK | OK |
| 770227 | 2474.0 | 8087.5 | 3.443354 | OK | OK | OK |
| 770231 | 3185.0 | 11553.0 | 3.050032 | OK | OK | OK |
| 770232 | 3081.0 | 12380.0 | 2.700411 | OK | OK | OK |
| 770238 | 2229.5 | 6877.0 | 5.361471 | OK | OK | OK |
| 770240 | 926.0 | 1759.5 | 3.589263 | OK | OK | OK |
| 770264 | 2945.0 | 11507.5 | 2.264529 | OK | OK | OK |
| 770267 | 947.0 | 1902.0 | 2.812986 | OK | OK | OK |
| 770271 | 2522.5 | 9251.5 | 3.257304 | OK | OK | OK |
| 770279 | 1795.0 | 5992.0 | 3.531969 | OK | OK | OK |
Set filtering limits
Setting limits based on parameters. Cells will be filtered for doublets and ambient RNA after doublet detection and ambient RNA detection, if performed.
if (params$filter=="multimodal") {
if (modetest.list[["nFeature_RNA"]][["p.value"]] <= 0.05) {
nFeature_RNA.ll <- round(antimode.list[["nFeature_RNA"]], digits=0)
} else {
nFeature_RNA.ll <- round(median.list[["nFeature_RNA"]]-3*mad.list[["nFeature_RNA"]], digits=0)
}
# use hard for other limits
nFeature_RNA.ul <- params$nFeature.ul
nCount_RNA.ul <- params$nCount.ul
percent.mt.ul <- params$percent.mt.ul
}
if (params$filter=="mad") {
nFeature_RNA.ll <- round(median.list[["nFeature_RNA"]]-3*mad.list[["nFeature_RNA"]],
digits=0)
nFeature_RNA.ul <- round(median.list[["nFeature_RNA"]]+3*mad.list[["nFeature_RNA"]],
digits=0)
percent.mt.ul <- round(median.list[["percent.mt"]]+3*mad.list[["percent.mt"]],
digits=0)
nCount_RNA.ul <- round(median.list[["nCount_RNA"]]+3*mad.list[["nCount_RNA"]],
digits=0)
}
if (params$filter=="hard") {
nFeature_RNA.ll <- params$nFeature.ll
nFeature_RNA.ul <- params$nFeature.ul
nCount_RNA.ul <- params$nCount.ul
percent.mt.ul <- params$percent.mt.ul
}
# nCount stays the same regardless
nCount_RNA.ll <- params$nCount.ll
Below are the filtering thresholds
- Median nFeature 2130
- MAD nFeature 1813.2198
- Median nCount_RNA 6413
- MAD nCount_RNA 7458.9606
- Median percent.mt 3.3044197
- MAD percent.mt 2.1523486
- Lower limit for number of genes 500
- Upper limit for number of genes 5000
- Lower limit for number of UMIs 2000
- Upper limit for number of UMIs 40000
- Upper limit for percentage mt 10
Doublet detection
Prior to doublet detection a minimum amount of filtering is performed. If cells will be downsampled, a subset of cells are utilized for doublet detection. Otherwise, doublet detection is performed on entire data set. Doublets are identified from each sample individually.
Currently, doublet detection is performed via the R package scds. The hybrid score utilized here incorporates two doublet scores: - bcds - artificial doublets are created and a doublet classifier is trained. Cells are assigned a probability of falling into the artificial doublet class. - cxds - expected co-expression of gene-pairs based on frequency is utilized to assign a rank-order based score to all gene-pairs. Scores for co-occuring gene-pairs for each cell are summed. The hybrid score is derived by summing the normalized bcds and cxds scores.
# Seurat was the only package that provided workable code for conversion between sce and seurat objects
seurat_dbl <- seurat_raw
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$nFeature_RNA < 200, "remove", "Keep")
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$percent.mt >= 20, "remove", seurat_dbl@meta.data$rmv)
seurat_dbl <- subset(seurat_dbl, subset=`rmv`=="Keep")
if (isTRUE(params$downsample)) {
ds=params$downsample_cells*2
seurat_dbl <- subset(seurat_dbl, downsample=ds)
}
samples <- seurat_dbl@meta.data$SampleID
seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
sce.list <- lapply(seurat.list,
function(s) Seurat::as.SingleCellExperiment(s))
sce.list <- lapply(sce.list,
function(sce) scds::cxds_bcds_hybrid(sce))
## [04:22:47] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:23:36] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:24:30] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:25:21] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:26:06] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:26:41] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:27:20] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:28:02] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:28:51] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:30:02] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:30:57] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:31:45] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:32:42] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:33:14] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:33:26] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:34:09] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:35:05] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:37:08] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:38:27] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:39:20] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
seurat.list <- lapply(sce.list,
function(sce) Seurat::as.Seurat(sce,
counts = "counts"))
num_seurat_obj <- length(seurat.list)
seurat_raw <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
seurat_raw@meta.data$hybrid_call <- ifelse(seurat_raw@meta.data$hybrid_score >= 1, "eDoublet", "eSinglet")
p <- ggplot(data=seurat_raw@meta.data, aes(x=hybrid_score))
p <- p + geom_density(fill=dbl.cols[["eDoublet"]], color="#AA005FFF")
p <- p + theme_sara() + labs(x="scds hybrid score")
p
Sample plots
Boxplots
A look at the distribution of doublet scores
dat <- as.data.frame(seurat_raw@meta.data)
dbl_data.df <- data.frame(SampleID=dat$SampleID,
hybrid_score=dat$hybrid_score,
hybrid_call=dat$hybrid_call)
median.df <- dbl_data.df %>%
group_by(SampleID) %>%
summarize(median_h=median(hybrid_score, na.rm=TRUE))
median.dt <- setDT(median.df)
# Get x-axis orders
setorder(median.dt, median_h)
h.ord <- median.dt$SampleID
p1 <- ggplot(data=dbl_data.df, aes(y=hybrid_score, x=SampleID, fill=SampleID))
p1 <- p1 + geom_boxplot() + labs(y="scds hybrid score")
p1 <- p1 + theme_sara() + scale_x_discrete(limits=h.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
axis.title.x=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
Hybrid call
dbl_data.df$hybrid_call <- factor(dbl_data.df$hybrid_call, levels=c("eSinglet", "eDoublet"))
p <- ggplot(data=dbl_data.df, aes(x=SampleID, fill=hybrid_call))
p <- p + geom_bar(position="fill", color="black")
p <- p + scale_fill_manual(values=dbl.cols, name="Hybrid call")
p <- p + theme_sara()
p <- p + theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle=90))
p
Density plot
p1 <- ggplot(data=dbl_data.df, aes(x=hybrid_score, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + theme_sara() + labs(x="scds hybrid score")
p1 <- p1 + theme(axis.title.y=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
Decontamination
This function is recently incorporated and not completely recommended unless you know what you are doing. The DecontX package is utilized for contaminant detection. A threshold of 0.5 is recommended. DecontX can be run with all samples, with each sample as a batch. If trouble with decontamination continues and raw counts are available, consider using those as background.
sce <- as.SingleCellExperiment(seurat_raw)
sce <- celda::decontX(sce, batch=sce$SampleID)
# If you do not reset these, bad things will happen.
sce$nCount_RNA <- NULL
sce$nFeature_RNA <- NULL
if (isTRUE(params$decontamX_filter)) {
r <- round(decontXcounts(sce))
seurat_raw <- CreateSeuratObject(counts=r,
meta.data=as.data.frame(colData(sce)))
} else {
seurat_raw <- CreateSeuratObject(counts=counts(sce),
meta.data=as.data.frame(colData(sce)))
}
seurat_raw@meta.data$decontX_contamination <- sce$decontX_contamination
seurat_raw@meta.data$contam_status <- ifelse(seurat_raw@meta.data$decontX_contamination >= params$decontamX_thresh,
"Ambient",
"Native")
p <- ggplot(data=seurat_raw@meta.data, aes(x=decontX_contamination))
p <- p + geom_density(fill=dcx.cols[["Ambient"]], color="#62C707FF")
p <- p + theme_sara() + labs(x="Percent contamination")
p
Sample plots
Boxplots
A look at the distribution of decontamination scores. The score ranges from 0 to 1 and refers to the percentage of cells that are contaminated.
dat <- as.data.frame(seurat_raw@meta.data)
median.df <- dat %>%
group_by(SampleID) %>%
summarize(median_X=median(decontX_contamination, na.rm=TRUE))
median.dt <- setDT(median.df)
# Get x-axis orders
setorder(median.dt, median_X)
X.ord <- median.dt$SampleID
p1 <- ggplot(data=dat, aes(y=decontX_contamination, x=SampleID, fill=SampleID))
p1 <- p1 + geom_boxplot()
p1 <- p1 + labs(y="Percent contamination")
p1 <- p1 + theme_sara() + scale_x_discrete(limits=X.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
axis.title.x=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
Contaminated threshold
dat$contam_status <- factor(dat$contam_status, levels=c("Native", "Ambient"))
p <- ggplot(data=dat, aes(x=SampleID, fill=contam_status))
p <- p + labs(y="Percent contamination")
p <- p + geom_bar(position="fill", color="black") + scale_x_discrete(limits=X.ord)
p <- p + scale_fill_manual(values=dcx.cols, name="Contamination status")
p <- p + theme_sara()
p <- p + theme(axis.title.x = element_blank(),
axis.text.x=element_text(angle=90))
p
Density plot
p1 <- ggplot(data=dat, aes(x=decontX_contamination, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + labs(x="Percent contamination")
p1 <- p1 + theme_sara()
p1 <- p1 + theme(axis.title.y=element_blank(),
legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1
Filtered cells
If doublet detection was performed (and data will be downsampled), the cells are already downsampled to 80k and filtered to exclude percent mitochondrial genes above 20% and number of genes below 200.
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$nFeature_RNA < nFeature_RNA.ll | seurat_raw@meta.data$nFeature_RNA > nFeature_RNA.ul,
"Remove",
"Keep")
# Percent mt
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$percent.mt > percent.mt.ul,
"Remove",
seurat_raw@meta.data$Filter)
#nCount_RNA
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$nCount_RNA < nCount_RNA.ll | seurat_raw@meta.data$nCount_RNA > nCount_RNA.ul,
"Remove",
seurat_raw@meta.data$Filter)
if (isTRUE(params$doublet_filter)) {
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$hybrid_call=="eDoublet",
"Remove",
seurat_raw@meta.data$Filter)
}
if (isTRUE(params$decontamX_filter)) {
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$contam_status=="Ambient",
"Remove",
seurat_raw@meta.data$Filter)
}
summary(as.factor(seurat_raw@meta.data$Filter))
## Keep Remove
## 96281 83676
All metrics
keep_remove.cols <- c(`Keep`="lavenderblush3",
`Remove`="#DC143C")
p1 <- ggplot(data=seurat_raw@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=Filter))
p1 <- p1 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept=nCount_RNA.ul)
p1 <- p1 + theme_sara()
p1 <- p1 + scale_color_manual(values=keep_remove.cols)
p2 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt, y=nFeature_RNA, color=Filter))
p2 <- p2 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept = percent.mt.ul)
p2 <- p2 + theme_sara()
p2 <- p2 + scale_color_manual(values=keep_remove.cols)
p3 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt, y=nCount_RNA, color=Filter))
p3 <- p3 + geom_point() + geom_vline(xintercept = percent.mt.ul) + geom_hline(yintercept=nCount_RNA.ul)
p3 <- p3 + geom_hline(yintercept=nCount_RNA.ll)
p3 <- p3 + theme_sara()
p3 <- p3 + scale_color_manual(values=keep_remove.cols)
legend <- cowplot::get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 12)))
prow <- cowplot::plot_grid(p1 + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
cowplot::plot_grid(prow, legend, rel_widths = c(4, .6))
Idents(seurat_raw) <- seurat_raw@meta.data$orig.ident
seurat <- subset(seurat_raw, subset=Filter=="Keep")
Check filtering
It never hurts.
Scatterplots
p1 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=Filter))
p1 <- p1 + geom_point()
p1 <- p1 + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul)
p1 <- p1 + geom_vline(xintercept=nCount_RNA.ll) + geom_vline(xintercept=nCount_RNA.ul)
p1 <- p1 + theme_sara()
p1 <- p1 + scale_color_manual(values=keep_remove.cols)
p2 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nFeature_RNA, color=Filter))
p2 <- p2 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul)
p2 <- p2 + geom_vline(xintercept = percent.mt.ul)
p2 <- p2 + theme_sara()
p2 <- p2 + scale_color_manual(values=keep_remove.cols)
p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nCount_RNA, color=Filter))
p3 <- p3 + geom_point() + geom_vline(xintercept = percent.mt.ul)
p3 <- p3 + geom_hline(yintercept=nCount_RNA.ul) + geom_hline(yintercept=nCount_RNA.ll)
p3 <- p3 + theme_sara()
p3 <- p3 + scale_color_manual(values=keep_remove.cols)
legend <- cowplot::get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 12)))
prow <- cowplot::plot_grid(p1 + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
cowplot::plot_grid(prow, legend, rel_widths = c(4, .6))
Density plots
p1 <- ggplot(data=seurat@meta.data, aes(x=nFeature_RNA))
p1 <- p1 + geom_density(fill="slateblue4") + labs(x="Number of genes")
p2 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA))
p2 <- p2 + geom_density(fill="magenta4") + labs(x="Number of UMIs") + theme(axis.title.y=element_blank())
p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt))
p3 <- p3 + geom_density(fill="olivedrab3") + labs(x="% mitochondrial genes") + theme(axis.title.y=element_blank())
prow <- cowplot::plot_grid(p1,
p2,
p3,
align = 'vh',
labels = c("A", "B", "C"),
hjust = -1,
nrow = 1)
cowplot::plot_grid(prow, rel_widths = c(4, .4))
Inspect samples after filtering
Look for outlier samples to make note of.
meta.df <- as.data.table(seurat@meta.data)
summary.df <- meta.df %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(n=n(),
median_nFeature=median(nFeature_RNA, na.rm=TRUE),
medan_nCount=median(nCount_RNA, na.rm=TRUE),
median_percentMT=median(percent.mt, na.rm=TRUE))
setDT(summary.df)
summary.melt.df <- data.table::melt(summary.df, id.vars="SampleID")
setorder(summary.df, n)
x.ord <- summary.df$SampleID
p1 <- ggplot(data=summary.df, aes(x=SampleID, y=n, fill=SampleID))
p1 <- p1 + geom_bar(stat="identity", color="black")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1 <- p1 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p1 <- p1 + scale_x_discrete(limits=x.ord)
p1 <- p1 + labs(y="Number of cells")
p2 <- ggplot(data=meta.df, aes(x=SampleID, y=nFeature_RNA, fill=SampleID))
p2 <- p2 + geom_boxplot()
p2 <- p2 + scale_fill_manual(values=sample.cols)
p2 <- p2 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p2 <- p2 + scale_x_discrete(limits=x.ord)
p2 <- p2 + labs(y="Number of Genes (nFeature_RNA)")
p3 <- ggplot(data=meta.df, aes(x=SampleID, y=nCount_RNA, fill=SampleID))
p3 <- p3 + geom_boxplot()
p3 <- p3 + scale_fill_manual(values=sample.cols)
p3 <- p3 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p3 <- p3 + scale_x_discrete(limits=x.ord)
p3 <- p3 + labs(y="Number of UMIs (nCount_RNA)")
p4 <- ggplot(data=meta.df, aes(x=SampleID, y=percent.mt, fill=SampleID))
p4 <- p4 + geom_boxplot()
p4 <- p4 + scale_fill_manual(values=sample.cols)
p4 <- p4 + theme_sara_90() + theme(axis.text=element_text(size=9),
axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p4 <- p4 + scale_x_discrete(limits=x.ord)
p4 <- p4 + labs(y="Percentage MT")
#legend <- cowplot::get_legend(p2 + theme(legend.box.margin = margin(0, 0, 0, 12)))
cowplot::plot_grid(p1 + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
p4 + theme(legend.position="none"),
align = 'vh',
labels = c("A", "B", "C", "D"),
hjust = -1,
nrow = 2)
Normalize and scale data
Only standard transformation is recommended at this time, due to statistical suitability and computational efficiency. SCTransformation is possible but has a very long run time.
Idents(seurat) <- seurat@meta.data$orig.ident
seurat <- subset(seurat, downsample=params$downsample_cells)
num_cells <- ncol(seurat)
num_genes <- nrow(seurat)
The seurat dataset used for this analysis has:
- 96281 cells
- 32285 genes
if (params$transform=="standard") {
std_prefix <- "RNA_snn_res."
seurat <- NormalizeData(seurat,
normalization.method = "LogNormalize",
scale.factor = 10000)
seurat <- FindVariableFeatures(object=seurat,
selection.method = "vst",
nfeatures = params$num_var_feature)
seurat <- ScaleData(object = seurat,
vars.to.regress=var_regress,
verbose = TRUE)
} else if (params$transform=="sct_v2") {
# SCTransform must be run on individual samples
std_prefix <- "SCT_snn_res."
seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
seurat.list <- lapply(seurat.list,
function(s) SCTransform(s,
vars.to.regress=var_regress,
variable.features.n=params$num_var_feature,
vst.flavor="v2",
method="glmGamPoi",
min_cells=20))
num_seurat_obj <- length(seurat.list)
seurat <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
} else if (params$transform=="sct_v1") {
std_prefix <- "SCT_snn_res."
seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
seurat.list <- BiocParallel::bplapply(seurat.list,
function(s) SCTransform(s,
vars.to.regress=var_regress,
variable.features.n=params$num_var_feature),
BPPARAM=MulticoreParam())
num_seurat_obj <- length(seurat.list)
seurat <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
}
Cell Cycle score
Cell cycle score is only supported for human and mouse data sets.
if (params$organism=="human") {
### [1] Create G2M marker gene list (i.e. genes associated with the G2M phase of the
### cell cycle using Seurat's built-in cc.genes (cell cycle) genes list
g2m.genes <- cc.genes$g2m.genes
s.genes <- cc.genes$s.genes
g2m <- rownames(seurat)[rownames(seurat) %in% g2m.genes]
} else if (params$organism=="mouse") {
cc_file <- RCurl::getURL("https://raw.githubusercontent.com/hbc/tinyatlas/master/cell_cycle/Mus_musculus.csv")
cell_cycle_genes <- read.csv(text = cc_file)
ah <- AnnotationHub::AnnotationHub()
# Access the Ensembl database for organism
ahDb <- AnnotationHub::query(ah,
pattern = c("Mus musculus", "EnsDb"),
ignore.case = TRUE)
# Acquire the latest annotation files
id <- ahDb %>%
mcols() %>%
rownames() %>%
tail(n = 1)
# Download the appropriate Ensembldb database
edb <- ah[[id]]
# Extract gene-level information from database
annotations <- genes(edb,
return.type = "data.frame")
# Select annotations of interest
annotations <- annotations %>%
dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)
# Get gene names for Ensembl IDs for each gene
cell_cycle_markers <- dplyr::left_join(cell_cycle_genes, annotations, by = c("geneID" = "gene_id"))
# Acquire the S phase genes
s.genes <- cell_cycle_markers %>%
dplyr::filter(phase == "S") %>%
dplyr::pull("gene_name")
# Acquire the G2M phase genes
g2m.genes <- cell_cycle_markers %>%
dplyr::filter(phase == "G2/M") %>%
dplyr::pull("gene_name")
}
### [2] Calculate G2M marker module score.
seurat <- CellCycleScoring(seurat,
s.features = s.genes, ### Genes associated with s-phase
g2m.features = g2m.genes, ### Genes associated with G2M phase
set.ident = TRUE)
Proportion of cells per sample in each phase
Heatmap
tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data$SampleID),2)
nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data$SampleID)))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Barplot
cell_cycle.cols <- c("#01617E","#8B9934","#E04883")
names(cell_cycle.cols) <- levels(as.factor(seurat@meta.data$Phase))
p <- ggplot(data=seurat@meta.data, aes(x=SampleID, fill=Phase))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=cell_cycle.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
Cell cycle regression
If cell cycle was previously identified as having an effect on the clustering and the differences are not biological, the difference between G2M and S phase scores will be calculated and used as the variable to regress. For most cases, it is not recommended to regress out cell cycle. Other variables will also be regressed out at this time.
seurat@meta.data$cc_dif <- seurat@meta.data$S.Score - seurat@meta.data$G2M.Score
var_regress <- gsub("cc", "cc_dif", params$var_regress)
seurat <- ScaleData(object = seurat,
vars.to.regress=var_regress,
verbose = TRUE)
Annotation
Prepare reference data
The Human Primary Cell Atlas (HPCA, human) and ImmGen (mouse) are both good choices. Mouse data can also be annotated with HPCA. HPCA is written as HPCA when the data is from human or when human reference gene names are converted to mouse gene names via biomart. HPCA is denoted as hpca when the reference data is converted to mouse names by changing to sentence case.
ref.data.list <- list()
if (params$organism=="human") {
if ("HPCA" %in% params$anno_db) {
ref.data.list[["HPCA"]] <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE)
}
}
if (params$organism=="mouse") {
if ("HPCA" %in% params$anno_db) {
#human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
ref.data <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE)
hpa_human.genes <- rownames(assay(ref.data))
human_to_mouse.df <- readRDS(params$biomart_rds)
#human_to_mouse.df <- getLDS(attributes=c("external_gene_name"),
# filters="external_gene_name",
# values = hpa_human.genes,
# mart = human,
# attributesL = c("external_gene_name"), martL = mouse)
assay.data <- as.data.frame(assay(ref.data))
assay.data$Gene.name <- rownames(assay.data)
assay.mdata <- merge(assay.data, human_to_mouse.df, by="Gene.name")
assay.mdata$Gene.name <- NULL
assay.mdata <- assay.mdata[!duplicated(assay.mdata$Gene.name.1),]
assay.mdata <- tibble::remove_rownames(assay.mdata)
assay.mdata <- tibble::column_to_rownames(assay.mdata, "Gene.name.1")
# Lose about 3k marker genes out of 19k
dim(assay.mdata)
dim(assay.data)
# over-write ref data
h2m_ref.data <- SummarizedExperiment(assays=SimpleList(logcounts=as.matrix(assay.mdata)),
rowData=rownames(assay.mdata),
colData=colData(ref.data),
metadata=metadata(ref.data))
ref.data.list[["HPCA"]] <- h2m_ref.data
}
if ("hpca" %in% params$anno_db) {
ref.data <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE)
hpa_human.genes <- rownames(assay(ref.data))
assay.data <- as.data.frame(assay(ref.data))
assay.data$Gene.name <- rownames(assay.data)
assay.data$Gene.name <- stringr::str_to_sentence(assay.data$Gene.name)
rownames(assay.data) <- assay.data$Gene.name
assay.data$Gene.name <- NULL
# overwrite reference data
h2m_ref.data <- SummarizedExperiment(assays=SimpleList(logcounts=as.matrix(assay.data)),
rowData=rownames(assay.data),
colData=colData(ref.data),
metadata=metadata(ref.data))
ref.data.list[["hpca"]] <- h2m_ref.data
}
if ("ImmGen" %in% params$anno_db) {
ref.data.list[["ImmGen"]] <- celldex::ImmGenData(ensembl=FALSE)
}
}
SingleR annotation
The differential expression method of Wilcoxon is used for training the classifier, which often provides a better annotation over the classic method. See links below. Spearman’s correlation is used for classification.
The Single R Book
SingleR marker detection
cells.list <- BiocParallel::bplapply(ref.data.list,
function(ref) SingleR(test=seurat@assays[["RNA"]]@data,
ref=ref,
labels=ref$label.main,
fine.tune = TRUE,
de.method="wilcox",
de.n=30,
BPPARAM=bpparam))
### Add to seurat
for (ann in names(cells.list)) {
ann_name <- paste0("SingleR_", ann)
ann_name_pruned <- paste0(ann_name, "_pruned")
seurat@meta.data[[ann_name]] <- cells.list[[ann]]$labels
seurat@meta.data[[ann_name_pruned]] <- cells.list[[ann]]$pruned.labels
}
num_ann <- length(cells.list)
nrow <- round((num_ann+.1)/2, digits=0)
fh <- nrow*6
Assess annotation
Heatmaps
Cells assigned to a particular cell type should have high scores. Here we look at 500 random cells.
rand_cells <- runif(500, min=1, max=500)
plot_shmap <- function(anno_db, cells) {
p <- plotScoreHeatmap(cells,
cells.use=rand_cells,
silent=TRUE,
treeheight_row=0,
fontsize=9,
main=anno_db)
return(p[[4]])
}
shmap.plots <- sapply(names(cells.list),
function(a) plot_shmap(anno_db=a,
cells=cells.list[[a]]),
simplify=FALSE, USE.NAMES=TRUE)
cowplot::plot_grid(plotlist=shmap.plots,
align="vh",
labels="AUTO",
label_size=12,
ncol=2,
nrow=nrow)
Deviation from assignment scores (delta)
Some cell types, such as monocytes, macrophages and dendritic cells may be especially difficult to predict.
plot_delta <- function(anno_db, cells) {
p <- plotDeltaDistribution(cells,
ncol=5,
size=0.5) +
theme_sara()+ theme(strip.text = element_text(size=9),
legend.position="bottom",
axis.text.x=element_blank())+
labs(y="Delta score", x="", title=anno_db)
return(p)
}
delta.plots <- sapply(names(cells.list),
function(a) plot_delta(anno_db=a,
cells=cells.list[[a]]),
simplify=FALSE, USE.NAMES=TRUE)
cowplot::plot_grid(plotlist=delta.plots,
align="vh",
labels="AUTO",
label_size=12,
ncol=2,
nrow=nrow)
pa <- params$pref_ann
pref_ann <- paste0("SingleR_", pa)
prune <- pruneScores(cells.list[[pa]],
nmads = 3,
min.diff.med = -Inf,
min.diff.next = 0,
get.thresholds = FALSE)
prune <- which(prune=="TRUE")
keep <- cells.list[[pa]][-prune,]
seurat <- seurat[,-prune]
SingleR_names <- paste0("SingleR_", names(cells.list))
names(SingleR_names) <- SingleR_names
collapse_other <- function(seurat, ref_label, freq.ll, cells.list) {
seurat@meta.data[[ref_label]] <- factor(seurat@meta.data[[ref_label]])
freq<-data.frame(janitor::tabyl(seurat@meta.data[[ref_label]], sort = TRUE))
colnames(freq)<-c("cells","n","proportion")
freq$percent<-freq$proportion*100
freq<-freq[order(freq$n, decreasing = TRUE),]
others<-as.vector(freq$cells[which(freq$n<freq.ll)])
seurat@meta.data[[ref_label]] <- forcats::fct_collapse(seurat@meta.data[[ref_label]],
Others=c(others))
return(seurat@meta.data[[ref_label]])
}
for (a in SingleR_names) {
seurat@meta.data[[a]] <- collapse_other(seurat,
ref_label=a,
freq.ll=params$ann_collapse_n,
cells.list=cells.list)
}
if ("HPCA" %in% params$anno_db) {
seurat@meta.data$SingleR_HPCA <- as.factor(seurat@meta.data$SingleR_HPCA)
seurat@meta.data$SingleR_HPCA <- forcats::fct_collapse(seurat@meta.data$SingleR_HPCA,
B_cell=c("B_cell", "Pro-B_cell_CD34+", "Pre-B_cell_CD34-"),
Others=c("Others", "Embryonic_stem_cells", "GMP",
"Hepatocytes", "iPS_cells", "Keratinocytes",
"MSC", "Neuroepithelial_cell", "Neurons"),
HSC=c("HSC_-G-CSF", "HSC_CD34+"),
Fibroblasts=c("Fibroblasts", "Chondrocytes",
"Smooth_muscle_cells",
"Tissue_stem_cells"),
Myelocyte=c("Myelocyte", "Pro-Myelocyte"))
}
if ("hpca" %in% params$anno_db) {
seurat@meta.data$SingleR_hpca <- as.factor(seurat@meta.data$SingleR_hpca)
seurat@meta.data$SingleR_hpca <- forcats::fct_collapse(seurat@meta.data$SingleR_hpca,
B_cell=c("B_cell", "Pro-B_cell_CD34+", "Pre-B_cell_CD34-"),
Others=c("Others", "Embryonic_stem_cells", "GMP",
"Hepatocytes", "iPS_cells", "Keratinocytes",
"MSC", "Neuroepithelial_cell", "Neurons"),
HSC=c("HSC_-G-CSF", "HSC_CD34+"),
Fibroblasts=c("Fibroblasts", "Chondrocytes",
"Smooth_muscle_cells",
"Tissue_stem_cells"),
Myelocyte=c("Myelocyte", "Pro-Myelocyte"))
}
if ("ImmGen" %in% params$anno_db) {
seurat@meta.data$SingleR_ImmGen <- as.character(seurat@meta.data$SingleR_ImmGen)
seurat@meta.data$SingleR_ImmGen <- if_else(seurat@meta.data$SingleR_ImmGen=="B cells, pro",
"B cells",
seurat@meta.data$SingleR_ImmGen)
}
Pie chart
Frequency of different cell types
dat <- as.data.table(seurat@meta.data)
ann <- paste0("SingleR_", params$pref_ann)
dt <- dat[, .N, by=ann]
dt <- dt[order(dt[[ann]]),]
if (ann %chin% names(ref.cols)) {
pref_ann.cols <- ref.cols[[ann]]
} else {
pref_ann.cols <- colorRampPalette(pals::watlington(n=16))(nrow(dt))
names(pref_ann.cols) <- dt[[ann]]
}
p <- ggplot(dt, aes(x="", y=N, fill=.data[[ann]]))
p <- p + geom_bar(stat="identity", width=1, color="white")
p <- p + coord_polar("y", start=0)
p <- p + scale_fill_manual(values=pref_ann.cols)
p <- p + theme_void()
p
PCA
seurat <- RunPCA(seurat)
Elbow plot
The elbow plot gives us an idea of how many PCs will be significant. When the plot plateaus, that is the point of diminishing returns.
dat <- data.frame(`Standard_deviation`=seurat@reductions[["pca"]]@stdev,
PC=c(1:length(seurat@reductions[["pca"]]@stdev)))
p <- ggplot(data=dat, aes(x=PC, y=Standard_deviation))
p <- p + geom_bar(stat="identity", fill="#6AAAB7", color=NA)
p <- p + theme_sara()
p <- p + labs(y="Standard deviation")
p
Dimension loadings
The dimension loadings give us an idea of the top genes contributing to each PC.
plot.list <- VizDimLoadings(seurat, dims=1:9, ncol=3, combine=FALSE, col=NA)
for (i in 1:length(plot.list)) {
pc <- paste("PC", i, sep="_")
#scale_lim <- max(a)
p <- plot.list[[i]]
xscale_lim <- max(abs(p$data[[pc]]))
p <- p + geom_bar(stat="identity", aes(fill=.data[[pc]]))
p<- p + scale_fill_gradient2(low=unikn_blue_red.cols[1],
mid="white",
high=unikn_blue_red.cols[3],
limits=c(-xscale_lim, xscale_lim))
p <- p + theme_sara()
p <- p + theme(axis.title = element_blank(),
axis.text.x=element_text(angle=90))
plot.list[[i]] <- p
}
cowplot::plot_grid(plotlist=plot.list,
align="vh",
labels="AUTO",
label_size=12,
ncol=3)
Clustering & UMAP
seurat <- RunUMAP(seurat, dims = 1:30, reduction.key="UMAP_")
seurat <- FindNeighbors(seurat, dims = 1:30)
# To-do parallelize, Seurat parallelization is not good.
for (alg in params$cluster_algorithm) {
if (alg=="Louvain2") {
seurat <- FindClusters(seurat, algorithm=2,
resolution=params$cluster_resolution,
verbose=FALSE)
names(seurat@meta.data) <- gsub(std_prefix, "Louvain2_", names(seurat@meta.data))
}
if (alg=="Leiden") {
seurat <- FindClusters(seurat, algorithm = 4,
resolution=params$cluster_resolution,
method="igraph",
verbose=FALSE)
names(seurat@meta.data) <- gsub(std_prefix, "Leiden_", names(seurat@meta.data))
}
if (alg=="Louvain") {
seurat <- FindClusters(seurat, algorithm=1,
resolution=params$cluster_resolution,
verbose=FALSE)
names(seurat@meta.data) <- gsub(std_prefix, "Louvain_", names(seurat@meta.data))
}
if (alg=="SLM") {
seurat <- FindClusters(seurat, algorithm=3,
resolution=params$cluster_resolution,
verbose=FALSE)
names(seurat@meta.data) <- gsub(std_prefix, "SLM_", names(seurat@meta.data))
}
}
saveRDS(seurat, file=final_seurat)
QC of clustering and annotation
How well do clusters agree with annotation?
We calculate the adjusted rand index (ARI) for clusters vs annotations, which ranges from 0 to 1. Larger values indicate a better fit. Ideally, we would hope for an ARI above at least 0.5, although this can vary with the single-cell data and availability of suitable references.
cluster.names <- names(seurat@meta.data)[grep("Leiden|Louvain", names(seurat@meta.data))]
names(cluster.names) <- cluster.names
ARI.list <- lapply(cluster.names,
function(c) lapply(SingleR_names,
function(a) mclust::adjustedRandIndex(seurat@meta.data[[a]],
seurat@meta.data[[c]])))
ARI.list <- unlist(ARI.list, recursive=FALSE)
ARI.dt <- data.table(ID=names(ARI.list),
ARI=unlist(ARI.list))
ARI.dt <- ARI.dt[, c("Cluster") := tstrsplit(ID, "_", keep=1)]
ARI.dt <- ARI.dt[, c("Resolution", "Annotation") := tstrsplit(ID, "_", keep=c(2,3))]
ARI.dt$Resolution <- gsub(".SingleR", "", ARI.dt$Resolution)
ARI.dt$Annotation <- paste0("SingleR_", ARI.dt$Annotation)
ARI.dt$cluster_res <- paste(ARI.dt$Cluster, ARI.dt$Resolution, sep="_")
num_ann <- length(levels(as.factor(ARI.dt$Annotation)))
fw <- (num_ann*2.5)+1
nr <- round((num_ann+.1)/2, digits=0)
fh <- (nr*2.5)+.5
# karpfenblau2 "#B4BCD6", "#8290BB", "#586BA4", "#3E5496","#324376
res.cols <- colorRampPalette(pal_karpfenblau)(length(params$cluster_resolution))
names(res.cols) <- levels(as.character(params$cluster_resolution))
p1 <- ggplot(data=ARI.dt, aes(x=Cluster, y=ARI, fill=as.character(Resolution)))
p1 <- p1 + geom_bar(stat="identity", color="white", position=position_dodge(preserve="total")) + scale_fill_manual(name="Resolution", values=res.cols)
p1 <- p1 + ylim(0,1)
p1 <- p1 + facet_wrap(~Annotation, scale="free_x", nrow=nr)
p1 <- p1 + theme_sara() + theme(strip.text = element_text(size=11))
p1
# Pick top ARI regardless of preferred annotation
setorder(ARI.dt, -ARI)
ARI.dt2 <- ARI.dt[, c("Annotation", "Cluster", "Resolution", "ARI"), with=FALSE]
ARI.dt2$ARI <- round(ARI.dt2$ARI, digits=4)
kable(ARI.dt2, booktabs = TRUE) %>%
kable_styling(font_size = 12)
| Annotation | Cluster | Resolution | ARI |
|---|---|---|---|
| SingleR_ImmGen | Louvain2 | 0.2 | 0.2416 |
| SingleR_ImmGen | Louvain2 | 0.15 | 0.2407 |
| SingleR_hpca | Louvain2 | 0.15 | 0.2370 |
| SingleR_hpca | Louvain2 | 0.2 | 0.2329 |
| SingleR_ImmGen | Louvain2 | 0.5 | 0.1369 |
| SingleR_hpca | Louvain2 | 0.5 | 0.1066 |
| SingleR_ImmGen | Louvain2 | 0.8 | 0.0703 |
| SingleR_ImmGen | Louvain2 | 1.1 | 0.0558 |
| SingleR_hpca | Louvain2 | 0.8 | 0.0554 |
| SingleR_hpca | Louvain2 | 1.1 | 0.0446 |
| SingleR_ImmGen | Louvain2 | 1.4 | 0.0425 |
| SingleR_hpca | Louvain2 | 1.4 | 0.0310 |
top_ann <- ARI.dt$Annotation[1]
top_clust_res <- ARI.dt$cluster_res[1]
if (top_ann != pref_ann) {
pref.ARI.dt <- ARI.dt[Annotation==pref_ann]
setorder(pref.ARI.dt, -ARI)
pref_clust_res <- pref.ARI.dt$cluster_res[1]
poor_choice=TRUE
} else {
pref_clust_res <- top_clust_res
poor_choice=FALSE
}
Comparison of UMAP by cluster vs annotation
UMAP clusters
Inspecting the UMAP This will give us an idea of how many clusters and how well separated. Look for the cells to stay within their cluster.
top_clust.cols <- colorRampPalette(carto_pal(n=12, "Vivid"))(length(levels(as.factor(seurat@meta.data[[top_clust_res]]))))
names(top_clust.cols) <- levels(as.factor(seurat@meta.data[[top_clust_res]]))
p <- DimPlot(seurat, group.by=top_clust_res)
p <- p + scale_color_manual(values=top_clust.cols)
p <- p + theme_sara()
p
UMAPs
Look for cells to be clustered well by cell type.
Top annotation
A UMAP of top annotation shows how well the clusters and annotation match up.
if (top_ann %chin% names(ref.cols)) {
top_ann.cols <- ref.cols[[top_ann]]
} else {
top_ann.cols <- colorRampPalette(pals::alphabet2(n=26))(nrow(dt))
names(top_ann.cols) <- levels(as.factor(seurat@meta.data[[top_ann]]))
}
p <- DimPlot(seurat, group.by=top_ann)
p <- p + scale_color_manual(values=top_ann.cols)
p <- p + theme_sara()
p
Preferred annotation
If the top annotation based on ARI is not the preferred annotation, the preferred annotation is plotted here.
p <- DimPlot(seurat, group.by=pref_ann)
p <- p + scale_color_manual(values=pref_ann.cols)
p <- p + theme_sara()
p <- p + theme(plot.title=element_blank())
p
Barplots
The annotation barplot is another visualization of cluster/annotation agreement. Bars should be mostly one (or two) kinds of cells, although this depends on the granularity of the annotation.
Top annotation
Barplot of best-scoring annotation according to ARI.
dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=.data[[top_clust_res]], fill=.data[[top_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=top_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
Preferred annotation
The preferred annotation is show here, with it’s best performing clustering, if it is different from top.
dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=.data[[pref_clust_res]], fill=.data[[pref_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=pref_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
Cluster markers
If the cluster and annotations don’t match up well, it can be helpful to look at the markers that differentiate the clusters. Cell cycle markers, for example, may be problematic.
Idents(seurat) <- seurat@meta.data[[top_clust_res]]
markers <- FindAllMarkers(seurat,
only.pos = TRUE,
min.pct = 0.3,
logfc.threshold = .5,
max.cells.per.ident=5000,
random.seed = 888)
markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) -> top5
marker.genes <- top5$gene
seurat.hm <- subset(seurat, downsample=1000)
mat<- seurat.hm[["RNA"]]@data[marker.genes, ] %>% as.matrix()
mat<- log2(mat+1)
#mat <- seurat.hm[["RNA"]]@scale.data[marker.genes,] %>% as.matrix() # use this and dont log to get DoHeatmap style
# cell colors
cell_anno<- seurat.hm@meta.data[[pref_ann]]
cell.levels <- levels(droplevels(as.factor(cell_anno)))
c.cols <- pref_ann.cols[names(pref_ann.cols) %in% cell.levels]
# cluster colors
clust_anno <- as.character(seurat.hm@meta.data[[top_clust_res]])
clust_levels <- levels((droplevels(as.factor(clust_anno))))
clust.cols <- colorRampPalette(carto_pal(n=12, name="Vivid"))(length(clust_levels))
names(clust.cols) <- clust_levels
# cluster rowcolors
clust_anno2 <- as.character(top5$cluster)
clust_levels <- levels((droplevels(as.factor(clust_anno2))))
clust.cols2 <- clust.cols
ta <- ComplexHeatmap::HeatmapAnnotation(`Cluster`=clust_anno,
`Cell type`=cell_anno,
col=list(`Cell type`=c.cols,
`Cluster`=clust.cols))
ra <- ComplexHeatmap::rowAnnotation(`Cluster`=clust_anno2,
col=list(`Cluster`=clust.cols2),
show_legend=FALSE)
heatmap2.colfun <- microViz::heat_palette(palette = "Cividis",
breaks = 100,
range = c(0,max(mat)),
sym = FALSE,
rev = FALSE)
ComplexHeatmap::Heatmap(mat, name = "Expression",
cluster_columns = FALSE,
cluster_rows=FALSE,
column_split=seurat.hm@meta.data[[top_clust_res]],
row_split=top5$cluster,
row_names_gp = grid::gpar(fontsize = 11),
column_title=character(0),
column_gap = unit(0.5, "mm"),
col = heatmap2.colfun,
top_annotation = ta,
show_column_names = FALSE,
left_annotation = ra)
Clusters by sample
This may be biologically ok, as long as the clusters also do not have high percent.mt, and/or low or high numbers of genes or UMIs
UMAP
Inspect the UMAP, does it look like some samples dominate clusters?
p <- DimPlot(seurat, group.by="SampleID")
p <- p + scale_color_manual(values=sample.cols)
p <- p + theme_sara()
p
Heatmap
The heatmap provides an alternative means of examining cluster/sample assignment.
tab<-prop.table(table(seurat@meta.data$SampleID, seurat@meta.data[[top_clust_res]]),2)
nr <- length(levels(as.factor(seurat@meta.data$SampleID)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Annotations by sample
We may have expectations about the expected proportion of cells in the dataset for certain cell types. Of course, we could be wrong, which might be fun, or not.
Top annotation
dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=SampleID, fill=.data[[top_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=top_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
Preferred annotation
If the top annotation based on ARI is not the preferred annotation, the preferred annotation is plotted here.
dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=SampleID, fill=.data[[pref_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=pref_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
axis.text=element_text(size=9),
legend.text=element_text(size=9),
legend.title=element_text(size=9),
axis.title.y=element_text(size=10),
strip.text=element_text(size=10),
legend.position="bottom")
p
Is there an effect of cell cycle?
There may be some structure in the UMAP due to cell cycle, it is also a good idea to check the PCs and cluster markers.
p <- DimPlot(seurat, group.by="Phase")
p <- p + scale_color_manual(values=cell_cycle.cols)
p <- p + theme_sara()
p
Cell cycle heatmaps by cluster and annotation
If cell cycle is evaluated, the results will be plotted here.
Cluster
tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data[[top_clust_res]]),2)
nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Annotation
tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data[[top_ann]]),2)
nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data[[top_ann]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Cell metrics effects
First we will look at the UMAP for visual inspection. Extremely low or high UMI/genes in a cluster or high percent.mt in a cluster may be problematic.
feature_plot <- function(seurat, feature) {
p <- ggplot(data=as.data.frame(seurat@reductions$umap@cell.embeddings),
aes(x=UMAP_1, y=UMAP_2,
color=seurat@meta.data[[feature]]))
p <- p + geom_point(size=0.1)
p <- p + theme_sara()
if(feature == "percent.mt") {
p <- p + scale_color_viridis_c(option="rocket",
direction=-1)
} else {
p <- p + scale_color_viridis_c(option="rocket",
direction=-1,
labels=scales::label_scientific())
}
p <- p + theme(axis.title.y.right = element_blank(),
legend.position="right",
legend.title=element_blank())
p <- p + labs(title=feature)
return(p)
}
p.list <- sapply(names(features),
function(feature) feature_plot(seurat, feature),
simplify=FALSE, USE.NAMES=TRUE)
cowplot::plot_grid(plotlist=p.list,
align="vh",
labels="AUTO",
label_size=12,
ncol=2,
nrow=2)
Cell metrics ridgeplots
Clusters
It may be helpful to investigate further clusters that have multiple cell types.
p1 <- RidgePlot(seurat, group.by=top_clust_res, features="nFeature_RNA", same.y.lims=TRUE, sort=TRUE)
p1 <- p1 + scale_fill_manual(values=top_clust.cols)
p1 <- p1 + theme(legend.position="none",
axis.title.y=element_blank(),
axis.text.x=element_text(size=11))
p1 <- p1 + scale_x_continuous(labels = scales::scientific)
p2 <- RidgePlot(seurat, group.by=top_clust_res, features="nCount_RNA", same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=top_clust.cols)
p2 <- p2 + theme(legend.position="none",
axis.text=element_text(size=11),
axis.title.y=element_blank())
p2 <- p2 + scale_x_continuous(labels = scales::scientific)
mt.ul <- max(seurat@meta.data$percent.mt)+5
p3 <- RidgePlot(seurat, group.by=top_clust_res, features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=top_clust.cols)
p3 <- p3 + xlim(0, mt.ul)
p3 <- p3 + theme(legend.position="none",
axis.title.y=element_blank(),
axis.text=element_text(size=11))
prow <- cowplot::plot_grid(p1,
p2,
p3,
align = 'vh',
labels = "AUTO",
hjust = -1,
nrow = 1)
prow
Annotation
p1 <- RidgePlot(seurat, group.by=top_ann, features="nFeature_RNA", same.y.lims=TRUE, sort=TRUE)
p1 <- p1 + scale_fill_manual(values=top_ann.cols)
p1 <- p1 + theme(legend.position="none", axis.title.y=element_blank())
p2 <- RidgePlot(seurat, group.by=top_ann, features="nCount_RNA", same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=top_ann.cols)
p2 <- p2 + theme(legend.position="none", axis.title.y=element_blank())
p3 <- RidgePlot(seurat, group.by=top_ann, features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=top_ann.cols)
p3 <- p3 + theme(legend.position="none", axis.title.y=element_blank())
p3 <- p3 + xlim(0, percent.mt.ul+2)
prow <- cowplot::plot_grid(p1,
p2,
p3,
align = 'vh',
labels = "AUTO",
hjust = -1,
nrow = 1)
prow
Doublet exploration
Section below will only be evaluated if doublet detection occurred. It is recommended to run first with doublet detection, then with doublet filtering as needed. The hybrid approach from scds package is utilized for computational efficiency. Although scds provides a “call” based on the score, it may be more useful to view the distribution of the score. For visualization purposes, we will call anything with a score of 1 or greater a doublet.
UMAPs
Doublet score
p <- FeaturePlot(seurat, features="hybrid_score")
p <- p + scale_color_viridis_c(option="rocket", direction=-1)
p <- p + theme_sara()
p
Doublet call
- eDoublet refers to cells estimated to be a doublet with a hybrid score greater than or equal to 1
- eSingletrefers to cells estimated to be a singlet with a hybrid score less than 1
p <- DimPlot(seurat, group.by="hybrid_call")
p <- p + theme_sara()
p <- p + scale_color_manual(values=dbl.cols)
p <- p + theme(plot.title=element_blank())
p
Clusters
Assessing the clusters will help us determine if there are any clusters prone to doublets. Clusters with doublets are good targets for removal.
Ridgeplot
p4 <- RidgePlot(seurat, group.by=top_clust_res, features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=top_clust.cols)
p4
Heatmap
tab<-prop.table(table(seurat@meta.data$hybrid_call, seurat@meta.data[[top_clust_res]]),2)
nr <- length(levels(as.factor(seurat@meta.data$hybrid_call)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Density
p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[top_clust_res]])
p
Doublets and annotation
Ridgeplot
p4 <- RidgePlot(seurat, group.by=pref_ann, features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=pref_ann.cols)
p4
Heatmap
tab<-prop.table(table(seurat@meta.data$hybrid_call, seurat@meta.data[[pref_ann]]),2)
nr <- length(levels(as.factor(seurat@meta.data$hybrid_call)))
nc <- length(levels(as.factor(seurat@meta.data[[pref_ann]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Density: Annotation - nCount
p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Density: Annotation - nFeature
p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nFeature_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Density: Annotation - percent.mt
p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=percent.mt))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Cell cycle
If cycle cycle is evaluated, results will be plotted here.
Ridgeplot
p4 <- RidgePlot(seurat, group.by="Phase", features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=cell_cycle.cols)
p4
Boxplots
p <- ggplot(data=seurat@meta.data, aes(x=Phase, y=hybrid_score, fill=Phase))
p <- p + geom_boxplot()
p <- p + scale_fill_manual(values=cell_cycle.cols)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Decontamination exploration
Section below will only be evaluated if decontamination detection occurred. It is recommended to run first with decontamination detection, then with decontamination filtering as needed.
UMAPs
Decontamination percent
p <- FeaturePlot(seurat, features="decontX_contamination")
p <- p + scale_color_viridis_c(option="rocket", direction=-1)
p <- p + theme_sara()
p
Decontamination threshold
A threshold of 0.6 was utilized.
p <- DimPlot(seurat, group.by="contam_status")
p <- p + theme_sara()
p <- p + scale_color_manual(values=dcx.cols)
p <- p + theme(plot.title=element_blank())
p
Clusters
Assessing the clusters will help us determine if there are any clusters prone to ambient RNA contamination
Ridgeplot
p4 <- RidgePlot(seurat, group.by=top_clust_res, features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=top_clust.cols)
p4
Heatmap
tab<-prop.table(table(seurat@meta.data$contam_status, seurat@meta.data[[top_clust_res]]),2)
nr <- length(levels(as.factor(seurat@meta.data$contam_status)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Density
p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[top_clust_res]])
p
Contamination and annotation
Ridgeplot
p4 <- RidgePlot(seurat, group.by=pref_ann, features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=pref_ann.cols)
p4
Heatmap
tab<-prop.table(table(seurat@meta.data$contam_status, seurat@meta.data[[pref_ann]]),2)
nr <- length(levels(as.factor(seurat@meta.data$contam_status)))
nc <- length(levels(as.factor(seurat@meta.data[[pref_ann]])))
ComplexHeatmap::Heatmap(tab,
cluster_columns = TRUE,
show_column_dend = TRUE,
row_names_gp = grid::gpar(fontsize = 12),
column_names_gp = grid::gpar(fontsize=12),
cluster_rows = TRUE,
show_row_dend = FALSE,
col = heatmap.colfun,
height = unit(5, "mm")*nr,
width = unit(5, "mm")*nc,
name="Proportion")
Density: Annotation - nCount
p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Density: Annotation - nFeature
p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nFeature_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Density: Annotation - percent.mt
p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=percent.mt))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Cell cycle
If cycle cycle is evaluated, results will be plotted here.
Ridgeplot
p4 <- RidgePlot(seurat, group.by="Phase", features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=cell_cycle.cols)
p4
Boxplots
p <- ggplot(data=seurat@meta.data, aes(x=Phase, y=decontX_contamination, fill=Phase))
p <- p + geom_boxplot()
p <- p + scale_fill_manual(values=cell_cycle.cols)
p <- p + facet_wrap(~.data[[pref_ann]])
p
Covariate analysis
If experiment metadata was provided, it will be utilized here. Median PC loading for all cells per sample will be calculated for the first 15 PCs. An ANOVA (Kruskal-Wallis rank sum test) will be performed with categorical variables and a glm will be performed on all numeric variables. Nominal p-values are shown.
metadata.df <- as.data.frame(data.table::fread(params$exp_metadata))
metadata.df$SampleID <- metadata.df[[params$sample_id]]
names(metadata.df) <- janitor::make_clean_names(names(metadata.df))
setnames(metadata.df, "sample_id", "SampleID")
meta_var <- names(metadata.df)[!names(metadata.df) %chin% c(params$sample_id, "SampleID")]
# For testing
#cluster_algs <- c("Louvain")
#top_clust_res <- "Louvain2_0.15"
#pref_ann <- "SingleR_ImmGen"
# For use
cluster_algs <- params$cluster_algorithm
seur.meta <- as.data.frame(seurat@meta.data)
seur.meta$Clusters <- seur.meta[top_clust_res]
seur_var <- names(seur.meta)
seur_var <- seur_var[grep("ident|pruned|rmv|Filter|seurat_clusters", seur_var, invert=TRUE)]
seur_var <- seur_var[!seur_var %like% cluster_algs]
covariates <- c(meta_var, seur_var)
seur.meta <- seur.meta %>% dplyr::select(dplyr::all_of(seur_var))
setnames(seur.meta, c("S.Score", "G2M.Score"), c("s_score", "g2m_score"))
all.meta <- merge(seur.meta, metadata.df, by="SampleID")
all.meta$rn <- rownames(seur.meta)
covariate_PCA_analysis <- function(seurat, metadata, numPCs, covariates) {
covariate.types <- sapply(metadata, class)
covariate.types <- gsub("character", "factor", covariate.types)
covariate.types <- covariate.types[names(covariate.types) %chin% covariates]
cat_var <- covariate.types[grep("factor", covariate.types)]
cat_var <- cat_var[grep("rn", names(cat_var), invert=TRUE)]
num_var <- covariate.types[grep("factor", covariate.types, invert=TRUE)]
pca.df <- as.data.frame(seurat@reductions$pca@cell.embeddings)[,1:numPCs]
pca.df$rn <- rownames(pca.df)
sample.df <- metadata[, c("rn", "SampleID")]
pca.df <- merge(pca.df, sample.df, by="rn", all.x=TRUE, all.y=FALSE)
pca.df$rn <- NULL
median.df <- pca.df %>%
dplyr::group_by(SampleID) %>%
summarize_all(list(median), na.rm=TRUE)
pca_meta.df <- merge(median.df, metadata, by="SampleID", all.x=TRUE, all.y=FALSE)
pca_meta.df <- pca_meta.df[!duplicated(pca_meta.df$SampleID),]
endCol <- length(names(pca_meta.df))
ann.df <- pca_meta.df[,1:numPCs+1]
anova_p.list <- sapply(names(cat_var),
function(f) apply(ann.df,
2,
function(x) kruskal.test(x ~ pca_meta.df[[f]])$p.value),
simplify=FALSE, USE.NAMES = TRUE)
anova_res.pval <- plyr::ldply(anova_p.list, .id="Factor")
glm_p.list <- sapply(names(num_var),
function(f) apply(ann.df,
2,
function(x) summary(glm(x ~ as.numeric(pca_meta.df[[f]])))$coefficients[2,4]),
simplify=FALSE, USE.NAMES = TRUE)
glm_res.pval <- plyr::ldply(glm_p.list, .id="Factor")
results <- rbindlist(list(glm_res.pval, anova_res.pval))
return(results)
}
covariate.results <- covariate_PCA_analysis(seurat=seurat,
metadata=all.meta,
numPCs=15,
covariates=meta_var)
cov.melt <- data.table::melt(covariate.results,
id.vars=c("Factor"),
variable.name="PC",
value="pvalue")
cov.melt$PC <- gsub("_", "", cov.melt$PC)
cov.melt$PC_x <- gsub("PC", "", cov.melt$PC)
cov.melt$PC_x <- as.numeric(cov.melt$PC_x)
cov.melt$Significance <- ifelse(cov.melt$pvalue <= 0.05, "SIG", NA)
p <- ggplot(cov.melt, aes(x=PC_x, y=Factor, fill=-log10(pvalue), color=Significance))
p <- p + geom_tile(color="white")
p <- p + geom_point(pch=8, size=2)
p <- p + scale_fill_viridis_c(option="rocket", direction=-1)
p <- p + scale_color_manual(values=c(`SIG`="yellow"), na.value=NA)
p <- p + scale_x_discrete(breaks=c(seq(1,15)), limits=c(seq(1,15)))
p <- p + theme_minimal()
p <- p + theme(axis.title=element_blank(),
axis.text=element_text(color="black", size=11))
p
end <- Sys.time()
That’s it! Spend some time looking through the report to determine next steps.
Analysis started: 2022-04-05 04:18:01
Analysis finished: 2022-04-05 07:20:19
R Session Information
Information about R, the OS and attached or loaded packages
pander::pander(sessionInfo(), compact = FALSE)
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=C.UTF-8, LC_NUMERIC=C, LC_TIME=C.UTF-8, LC_COLLATE=C.UTF-8, LC_MONETARY=C.UTF-8, LC_MESSAGES=C.UTF-8, LC_PAPER=C.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=C.UTF-8 and LC_IDENTIFICATION=C
attached base packages:
- grid
- stats4
- stats
- graphics
- grDevices
- utils
- datasets
- methods
- base
other attached packages:
- microViz(v.0.9.0)
- ComplexHeatmap(v.2.10.0)
- SeuratObject(v.4.0.4)
- Seurat(v.4.1.0)
- celda(v.1.10.0)
- ggpointdensity(v.0.1.0)
- nord(v.1.0.0)
- unikn(v.0.4.0)
- mclust(v.5.4.9)
- ggridges(v.0.5.3)
- scales(v.1.1.1)
- scds(v.1.10.0)
- kableExtra(v.1.3.4)
- rcartocolor(v.2.0.0)
- BiocParallel(v.1.28.3)
- multimode(v.1.5)
- scater(v.1.22.0)
- patchwork(v.1.1.1)
- glmGamPoi(v.1.6.0)
- ensembldb(v.2.18.3)
- AnnotationFilter(v.1.18.0)
- GenomicFeatures(v.1.46.4)
- AnnotationDbi(v.1.56.2)
- AnnotationHub(v.3.2.1)
- BiocFileCache(v.2.2.1)
- dbplyr(v.2.1.1)
- pals(v.1.7)
- forcats(v.0.5.1)
- janitor(v.2.1.0)
- dplyr(v.1.0.8)
- scran(v.1.22.1)
- scuttle(v.1.4.0)
- SingleCellExperiment(v.1.16.0)
- biomaRt(v.2.50.3)
- celldex(v.1.4.0)
- SingleR(v.1.8.1)
- SummarizedExperiment(v.1.24.0)
- Biobase(v.2.54.0)
- GenomicRanges(v.1.46.1)
- GenomeInfoDb(v.1.30.1)
- IRanges(v.2.28.0)
- S4Vectors(v.0.32.3)
- BiocGenerics(v.0.40.0)
- MatrixGenerics(v.1.6.0)
- matrixStats(v.0.61.0)
- plyr(v.1.8.6)
- ggplot2(v.3.3.5)
- data.table(v.1.14.2)
- rmarkdown(v.2.11)
loaded via a namespace (and not attached):
- rsvd(v.1.0.5)
- ica(v.1.0-2)
- svglite(v.2.1.0)
- assertive.properties(v.0.0-4)
- Rsamtools(v.2.10.0)
- foreach(v.1.5.2)
- lmtest(v.0.9-39)
- crayon(v.1.5.0)
- rhdf5filters(v.1.6.0)
- spatstat.core(v.2.4-0)
- MASS(v.7.3-55)
- backports(v.1.4.1)
- nlme(v.3.1-155)
- rmdformats(v.1.0.3)
- rlang(v.1.0.1)
- XVector(v.0.34.0)
- ROCR(v.1.0-11)
- irlba(v.2.3.5)
- limma(v.3.50.1)
- filelock(v.1.0.2)
- xgboost(v.1.5.2.1)
- rjson(v.0.2.21)
- bit64(v.4.0.5)
- glue(v.1.6.1)
- pheatmap(v.1.0.12)
- sctransform(v.0.3.3)
- parallel(v.4.1.2)
- vipor(v.0.4.5)
- spatstat.sparse(v.2.1-0)
- spatstat.geom(v.2.3-2)
- tidyselect(v.1.1.2)
- phyloseq(v.1.38.0)
- fitdistrplus(v.1.1-6)
- XML(v.3.99-0.8)
- tidyr(v.1.2.0)
- assertive.types(v.0.0-3)
- zoo(v.1.8-9)
- GenomicAlignments(v.1.30.0)
- xtable(v.1.8-4)
- magrittr(v.2.0.2)
- evaluate(v.0.15)
- cli(v.3.2.0)
- zlibbioc(v.1.40.0)
- dbscan(v.1.1-10)
- rstudioapi(v.0.13)
- miniUI(v.0.1.1.1)
- bslib(v.0.3.1)
- rpart(v.4.1.16)
- RcppEigen(v.0.3.3.9.1)
- maps(v.3.4.0)
- shiny(v.1.7.1)
- BiocSingular(v.1.10.0)
- xfun(v.0.29)
- clue(v.0.3-60)
- multtest(v.2.50.0)
- cluster(v.2.1.2)
- biomformat(v.1.22.0)
- KEGGREST(v.1.34.0)
- tibble(v.3.1.6)
- interactiveDisplayBase(v.1.32.0)
- ggrepel(v.0.9.1)
- ape(v.5.6-2)
- listenv(v.0.8.0)
- permute(v.0.9-7)
- Biostrings(v.2.62.0)
- png(v.0.1-7)
- future(v.1.24.0)
- withr(v.2.4.3)
- bitops(v.1.0-7)
- assertive.base(v.0.0-9)
- pracma(v.2.3.8)
- dqrng(v.0.3.0)
- pROC(v.1.18.0)
- pillar(v.1.7.0)
- GlobalOptions(v.0.1.2)
- cachem(v.1.0.6)
- GetoptLong(v.1.0.5)
- DelayedMatrixStats(v.1.16.0)
- vctrs(v.0.3.8)
- ellipsis(v.0.3.2)
- generics(v.0.1.2)
- tools(v.4.1.2)
- beeswarm(v.0.4.0)
- munsell(v.0.5.0)
- DelayedArray(v.0.20.0)
- fastmap(v.1.1.0)
- compiler(v.4.1.2)
- abind(v.1.4-5)
- httpuv(v.1.6.5)
- rtracklayer(v.1.54.0)
- ExperimentHub(v.2.2.1)
- plotly(v.4.10.0)
- GenomeInfoDbData(v.1.2.7)
- gridExtra(v.2.3)
- enrichR(v.3.0)
- edgeR(v.3.36.0)
- lattice(v.0.20-45)
- deldir(v.1.0-6)
- utf8(v.1.2.2)
- later(v.1.3.0)
- jsonlite(v.1.8.0)
- multipanelfigure(v.2.1.2)
- ScaledMatrix(v.1.2.0)
- pbapply(v.1.5-0)
- sparseMatrixStats(v.1.6.0)
- lazyeval(v.0.2.2)
- promises(v.1.2.0.1)
- doParallel(v.1.0.17)
- R.utils(v.2.11.0)
- goftest(v.1.2-3)
- spatstat.utils(v.2.3-0)
- reticulate(v.1.24)
- cowplot(v.1.1.1)
- statmod(v.1.4.36)
- webshot(v.0.5.2)
- Rtsne(v.0.15)
- pander(v.0.6.4)
- dichromat(v.2.0-0)
- uwot(v.0.1.11)
- igraph(v.1.2.11)
- survival(v.3.2-13)
- yaml(v.2.3.5)
- systemfonts(v.1.0.4)
- htmltools(v.0.5.2)
- memoise(v.2.0.1)
- BiocIO(v.1.4.0)
- locfit(v.1.5-9.5)
- viridisLite(v.0.4.0)
- digest(v.0.6.29)
- assertthat(v.0.2.1)
- mime(v.0.12)
- rappdirs(v.0.3.3)
- RSQLite(v.2.2.10)
- future.apply(v.1.8.1)
- mapproj(v.1.2.8)
- vegan(v.2.5-7)
- blob(v.1.2.2)
- R.oo(v.1.24.0)
- styler(v.1.6.2)
- labeling(v.0.4.2)
- splines(v.4.1.2)
- Rhdf5lib(v.1.16.0)
- ProtGenerics(v.1.26.0)
- RCurl(v.1.98-1.6)
- assertive.numbers(v.0.0-2)
- ks(v.1.13.4)
- hms(v.1.1.1)
- rhdf5(v.2.38.0)
- colorspace(v.2.0-3)
- BiocManager(v.1.30.16)
- ggbeeswarm(v.0.6.0)
- shape(v.1.4.6)
- assertive.files(v.0.0-2)
- sass(v.0.4.0)
- Rcpp(v.1.0.8)
- bookdown(v.0.24)
- RANN(v.2.6.1)
- mvtnorm(v.1.1-3)
- circlize(v.0.4.14)
- fansi(v.1.0.2)
- parallelly(v.1.30.0)
- R6(v.2.5.1)
- lifecycle(v.1.0.1)
- rootSolve(v.1.8.2.3)
- bluster(v.1.4.0)
- curl(v.4.3.2)
- leiden(v.0.3.9)
- jquerylib(v.0.1.4)
- snakecase(v.0.11.0)
- Matrix(v.1.4-0)
- RcppAnnoy(v.0.0.19)
- RColorBrewer(v.1.1-2)
- iterators(v.1.0.14)
- stringr(v.1.4.0)
- R.cache(v.0.15.0)
- htmlwidgets(v.1.5.4)
- beachmat(v.2.10.0)
- polyclip(v.1.10-0)
- purrr(v.0.3.4)
- gridGraphics(v.0.5-1)
- rvest(v.1.0.2)
- mgcv(v.1.8-38)
- globals(v.0.14.0)
- spatstat.random(v.2.1-0)
- codetools(v.0.2-18)
- lubridate(v.1.8.0)
- FNN(v.1.1.3)
- metapod(v.1.2.0)
- prettyunits(v.1.1.1)
- MCMCprecision(v.0.4.0)
- R.methodsS3(v.1.8.1)
- RSpectra(v.0.16-0)
- gtable(v.0.3.0)
- DBI(v.1.1.2)
- highr(v.0.9)
- tensor(v.1.5)
- httr(v.1.4.2)
- KernSmooth(v.2.23-20)
- stringi(v.1.7.6)
- progress(v.1.2.2)
- farver(v.2.1.0)
- reshape2(v.1.4.4)
- diptest(v.0.76-0)
- viridis(v.0.6.2)
- magick(v.2.7.3)
- xml2(v.1.3.3)
- combinat(v.0.0-8)
- BiocNeighbors(v.1.12.0)
- restfulr(v.0.0.13)
- ade4(v.1.7-18)
- scattermore(v.0.8)
- BiocVersion(v.3.14.0)
- bit(v.4.0.4)
- spatstat.data(v.2.1-2)
- pkgconfig(v.2.0.3)
- knitr(v.1.37)